library(reshape2)
library(estimatr)
library(plyr)
library(foreign)
library(ltm)
library(MCMCpack)
library(wfe)
library(devtools)
library(panelView)
library(lme4)
library(arm)
library(Matching)
library(rgenoud)
library(cem)
library(quickmatch)
library(zoo)
library(dplyr)
library(did)
library(gsynth)
library(DataCombine)
library(broom)
library(stargazer)
library(parallel)
library(margins)
library(ggplot2)
library(readstata13)

#
gc()



# # Calculate the number of cores
# num_cores <- detectCores() - 1
# 
# # Initiate cluster
# cl <- makeCluster(num_cores)
gc()

###State FIPS codes
fips_codes <- read.csv("fips_codes_website.csv")


data <- readRDS("policy_data_updated.RDS")

##Load New Early Voting Data

ev <- read.csv("Early Voting Coding - Sheet1.csv")

data$early_voting_narrow
data$early_voting_broad

data <- join(data, ev[,c("state","year_eip","year_amv","year_abs")])

for(i in levels(factor(data$state))){
  
  data$early_voting_person[data$state==i & 
                             (data$year>=data$year_eip) & 
                             data$year<=2014] <- 1
  
  data$early_voting_narrow[data$state==i & 
                             (data$year>=data$year_eip|data$year>=data$year_amv) & 
                             data$year<=2014] <- 1
  data$early_voting_broad[data$state==i & 
                            (data$year>=data$year_eip|data$year>=data$year_amv|data$year>=data$year_abs) & 
                            data$year<=2014] <- 1
  
}

data$early_voting_narrow[is.na(data$early_voting_narrow) & data$year<=2014] <- 0
data$early_voting_broad[is.na(data$early_voting_broad) & data$year<=2014] <- 0
data$early_voting_person[is.na(data$early_voting_person) & data$year<=2014] <- 0


presidential_years <- c(1976,1980,1984,1988,1992,1996,2000,2004,2008,2012,2016)


######################################Load and Clean CPS Individual Data######################################

cps <- read.csv("cps_00021.csv"); gc()

names(cps) <- tolower((names(cps)))

#Removing "refused to answer"
cps$voted_alt <- cps$voted - 1
cps$voted_alt[cps$voted_alt>1] <- NA

#Main dependent variable
cps$voted[cps$voted==99] <- NA
cps$voted <- cps$voted - 1
cps$voted[cps$voted>1] <- 0

cps$faminc[cps$faminc>843] <- NA
cps$faminc <- as.numeric(factor(cps$faminc))

cps$sex[cps$sex>2] <- NA

cps$educ[cps$educ==999|cps$educ==1] <- NA

cps$educ <- as.numeric(factor(cps$educ))

#merge state FIPS codes
cps <- join(cps, fips_codes[,c("st","statefip")], match="first")

#Merge state policy data
cps <- join(cps, data, by=c("st","year")); gc()

cps$age_group[cps$age<=24] <- "18-24"
cps$age_group[cps$age>24 & cps$age<35] <- "25-34"
cps$age_group[cps$age>34 & cps$age<45] <- "35-44"
cps$age_group[cps$age>44 & cps$age<55] <- "45-54"
cps$age_group[cps$age>54 & cps$age<65] <- "55-64"
cps$age_group[cps$age>64] <- "65+"

cps$presidential_year[cps$year %in% presidential_years] <- 1
cps$presidential_year[(cps$year %in% presidential_years)==F] <- 0

cps$sex[cps$sex==0] <- NA
cps$sex <- cps$sex - 1

cps$race_5[cps$race==100] <- "White"
cps$race_5[cps$race==200] <- "Black"
cps$race_5[cps$race==300 | cps$race>=700] <- "Other"
cps$race_5[(cps$race>=600 & cps$race<700) | cps$race==809] <- "Asian"
cps <- within(cps, race_5 <- relevel(factor(race_5), ref = "White"))
cps <- cps[cps$year>=1978,]

cps$asian[cps$race_5=="Asian"] <- 1
cps$asian[cps$race_5!="Asian"] <- 0


gc()

#age_group dummy variables
cps <- cbind(cps, data.frame(model.matrix(~ age_group - 1, data=cps)))

#race dummy variables
cps <- cbind(cps, data.frame(model.matrix(~ as.character(race) - 1, data=cps)))
names(cps) <- gsub("as.character.", "", names(cps))

#registration
cps$reg_pollingplace[cps$voreghow<99 & cps$voreghow!=7] <- 0
cps$reg_pollingplace[cps$voreghow<99 & cps$voreghow==7] <- 1

cps$vote_reg[cps$voreg<=98 & cps$voreg!=2] <- 0
cps$vote_reg[cps$voreg<=98 & cps$voreg==2] <- 1


gc()

#
cps$movedless1yr[cps$voteres<14] <- 1
cps$movedless1yr[cps$voteres>=14 & cps$voteres<900] <- 0

#Remove DC
cps <- cps[cps$st!="DC",]; gc()


####Change Working Directory to output tables & plots
setwd("./Output")



##############################Descriptive Plots##############################
paneldata <- data[data$year>=1978,c("st","year","sdr","early_voting_narrow", "early_voting_broad", "early_voting_person", "pop_annual")]
paneldata$st <- as.character(paneldata$st)


pdf("PanelView - SDR (new).pdf", h=7, w=12)
panelView(D="sdr", 
          data=paneldata,
          #D="sdr",
          #by.timing = T,
          pre.post = TRUE,
          #data=paneldata,
          index=c("st","year"),
          xlab="Year",
          ylab="State",
          main="SDR",
          legend.labs = c("Control Observations", "Treated Observations"),
          color=c("white","black"),
          theme.bw = T)
dev.off()


pdf("PanelView - Early Voting (person).pdf", h=7, w=12)
panelView(D="early_voting_person",
          data=paneldata,
          index=c("st","year"),
          xlab="Year",
          ylab="State",
          main="Early Voting (in Person)",
          legend.labs = c("Control Observations", "Treated Observations"),
          color=c("white","black"),
          theme.bw = T)
dev.off()


pdf("PanelView - Early Voting (narrow).pdf", h=7, w=12)
panelView(D="early_voting_narrow",
          data=paneldata,
          index=c("st","year"),
          xlab="Year",
          ylab="State",
          main="Early Voting (Preferred Def.)",
          legend.labs = c("Control Observations", "Treated Observations"),
          color=c("white","black"),
          theme.bw = T)
dev.off()

pdf("PanelView - Early Voting (broad).pdf", h=7, w=12)
panelView(D="early_voting_broad",
          data=paneldata,
          index=c("st","year"),
          xlab="Year",
          ylab="State",
          main="Early Voting (Broad Def.)",
          legend.labs = c("Control Observations", "Treated Observations"),
          color=c("white","black"),
          theme.bw = T)
dev.off()


plotdata_wide <- aggregate(cbind(faminc, age, educ, race.100,race.200,asian) ~ sdr + year, cps[cps$year>=1982,], mean, na.rm=T)

plotdata_wide$race.100 <- plotdata_wide$race.100*100
plotdata_wide$race.200 <- plotdata_wide$race.200*100
plotdata_wide$asian <- plotdata_wide$asian*100


plotdata <- reshape(plotdata_wide, 
                    varying = names(plotdata_wide)[3:length(names(plotdata_wide))], 
                    v.names = "mean",
                    timevar = "variable", 
                    times = names(plotdata_wide)[3:length(names(plotdata_wide))],
                    direction = "long")

plotdata$variable[plotdata$variable=="race.100"] <- "White"
plotdata$variable[plotdata$variable=="race.200"] <- "Black"
plotdata$variable[plotdata$variable=="asian"] <- "Asian"
plotdata$variable[plotdata$variable=="faminc"] <- "Income"
plotdata$variable[plotdata$variable=="age"] <- "Age"
plotdata$variable[plotdata$variable=="educ"] <- "Education"

pdf("descriptive_demographics_byyear.pdf", h=5, w=7)
ggplot(plotdata, aes(x=year, y=mean, group=interaction(variable, sdr), shape=factor(sdr), color=factor(sdr))) +
  geom_line(size=1) +
  #geom_point() +
  facet_wrap(~variable) +
  scale_color_manual(values=c("grey","black"), name="SDR", labels=c("No", "Yes")) +
  xlab("Year") +
  ylab("") +
  theme_classic()
dev.off()

rm(plotdata, plotdata_wide); gc()


#########################################################################################################
################################################ANALYSIS#################################################
#########################################################################################################


################Figure 2 (bivariate)
agg_cps_age_sdr <- aggregate(voted ~ sdr + age_group, cps, mean, na.rm=T)
write.csv(agg_cps_age_sdr, "agg_cps_age_sdr.csv", na="", row.names = F)

pd <- position_dodge(.75)

pdf("bivariate_plot_age.pdf", h=4, w=5)
ggplot(agg_cps_age_sdr, aes(y=voted, x=age_group, fill=factor(sdr)))+
  geom_bar(width = .75, stat = "identity", position=pd, color="black") + 
  scale_fill_manual(values=c("grey","black"), labels=c("non-SDR", "SDR"), name="") +
  ylab("Pr(Voted)") +
  xlab("Age") +
  theme_classic()
dev.off()

################Appendix Figure (presidential, bivariate)

agg_cps_age_presidential_sdr <- aggregate(voted ~ sdr + age_group + presidential_year, cps, mean, na.rm=T)

agg_cps_age_presidential_sdr$prez[agg_cps_age_presidential_sdr$presidential_year==1] <- "Presidential"
agg_cps_age_presidential_sdr$prez[agg_cps_age_presidential_sdr$presidential_year==0] <- "Non-Presidential"

pdf("bivariate_plot_age_POTUS.pdf", h=4, w=8)
ggplot(agg_cps_age_presidential_sdr, aes(y=voted, x=age_group, fill=factor(sdr)))+
  geom_bar(width = .75, stat = "identity", position=pd, color="black") + 
  scale_fill_manual(values=c("grey","black"), labels=c("non-SDR", "SDR"), name="") +
  ylab("Pr(Voted)") +
  xlab("Age") +
  facet_wrap(~prez) +
  theme_classic()
dev.off()





################################################
################Diff in Diff####################         THE MAIN ANALYSIS
################################################

#########Aggregating#############
gc()

agg_cps <- aggregate(voted ~ state + year + sdr + early_voting_narrow + early_voting_broad + presidential_year, cps, mean, na.rm=T)
agg_cps_age <- aggregate(voted ~ state + year + sdr + presidential_year + age_group + 
                           povrate + pct_black_i + pct_white_i, cps, mean, na.rm=T)

cps$white[cps$race.100==1] <- 1
cps$white[cps$race.100!=1] <- 0
cps$black[cps$race.200==1] <- 1
cps$black[cps$race.200!=1] <- 0

cps$asian[cps$race>200 & cps$race<800] <- 1
cps$asian[cps$race<=200 | cps$race>=800] <- 0

race_means_agg_cps_age <- aggregate(cbind(white, black, asian, faminc, educ) ~ state + year + age_group, cps, mean, na.rm=T)

agg_cps_age <- join(agg_cps_age, race_means_agg_cps_age)


agg_cps_age <- agg_cps_age[order(agg_cps_age$state, agg_cps_age$age_group, agg_cps_age$year),]



################Aggregate Diff in Diff Specifications################

output <- list()

did_table_list <- list()

for(j in c("sdr")){
  
  agg_cps_age$temp_var <- agg_cps_age[,"sdr"]
  
  output_wfe <- list()
  output_ols <- list()
  output_wfe_controls <- list()
  output_ols_controls <- list()
  
  for(i in levels(factor(agg_cps_age$age_group))){
    
    tempdata <- agg_cps_age[agg_cps_age$age_group==i,]
    
    tempdata$state <- as.character(tempdata$state)
    
    temp <- wfe(voted ~ temp_var, tempdata, treat = "temp_var",
                unit.index="state", time.index = "year", method = "unit",
                qoi = "ate", estimator = "did", C.it = NULL,
                hetero.se = TRUE, auto.se = TRUE, #df.adjustment = F,
                White = TRUE, White.alpha = 0.05,
                verbose = T, unbiased.se = T, unweighted = FALSE,
                store.wdm = FALSE, maxdev.did = NULL,
                tol = sqrt(.Machine$double.eps))
    
    wfe_temp <- matrix(nrow = 4, ncol=5)
    wfe_temp[1,1] <- j
    wfe_temp[1,2] <- temp$coefficients
    wfe_temp[1,3] <- temp$se
    wfe_temp[1,4] <- i
    wfe_temp[1,5] <- "WFE"
    wfe_temp <- data.frame(wfe_temp)
    names(wfe_temp) <- c("term", "estimate", "std.error", "group", "model")
    output_wfe[[i]] <- wfe_temp[complete.cases(wfe_temp),]
    
    temp <- tidy(lm(voted ~ temp_var + presidential_year + factor(state) + factor(year), tempdata))
    cluster_se_temp <- as.numeric(commarobust(lm(voted ~ temp_var + presidential_year + factor(state) + factor(year), tempdata),
                                              se_type="HC1")$std.error)
    temp$std.error <- cluster_se_temp[!is.na(cluster_se_temp)]
    
    temp$group <- i
    temp$term[temp$term=="temp_var"] <- j
    temp$model <- "OLS"
    output_ols[[i]] <- temp[temp$term==j,]
    if(j=="sdr"){did_table_list[[i]] <- temp}
    
    temp <- tidy(lm(voted ~ temp_var + white + black + asian + povrate + educ + presidential_year + factor(state) + factor(year), tempdata))
    cluster_se_temp <- as.numeric(commarobust(lm(voted ~ temp_var + white + black + asian + povrate + educ + presidential_year + factor(state) + factor(year), tempdata),
                                              se_type="HC1")$std.error)
    temp$std.error <- cluster_se_temp[!is.na(cluster_se_temp)]
    
    temp$group <- i
    temp$term[temp$term=="temp_var"] <- j
    temp$model <- "OLS (controls)"
    output_ols_controls[[i]] <- temp[temp$term==j,]
    
    if(j=="sdr"){did_table_list[[i]] <- rbind(did_table_list[[i]], temp)}
    
    gc()
    
  }
  
  output[[j]] <- do.call(rbind.fill, c(output_wfe, 
                                       output_ols, 
                                       output_ols_controls))
  
}
did_table <- do.call(rbind.fill, c(did_table_list))

did_table$model[did_table$model=="OLS"] <- "Bivariate"
did_table$model[did_table$model=="OLS (controls)"] <- "Controls"


did_table[,c("estimate", "std.error", "statistic","p.value")] <- round(did_table[,c("estimate", "std.error", "statistic","p.value")], digits=2)

write.csv(did_table[grepl("factor", did_table$term)==F,], "did_tables_statelevel_full.csv", na="", row.names=F)


plotdata <- do.call(rbind.fill, c(output))

plotdata[, 2:3] <- apply(plotdata[, 2:3], 2, function(x) as.numeric(as.character(x)))
plotdata[, 2:3] <- plotdata[, 2:3]*100
plotdata$cilow <- plotdata$estimate - (1.96*plotdata$std.error)
plotdata$cihigh <- plotdata$estimate + (1.96*plotdata$std.error)

plotdata$term[plotdata$term=="sdr"] <- "Same Day Reg."
plotdata$term[plotdata$term=="early_voting_narrow"] <- "Early Voting (Restrictive Def.)"
plotdata$term[plotdata$term=="early_voting_broad"] <- "Early Voting (Broad Def.)"
plotdata$term[plotdata$term=="voterid"] <- "Voter ID"

plotdata$Model <- plotdata$model

write.csv(plotdata, "diffindiff_estimates_cps5.csv", na="")


################Individual Level Diff in Diff################
rm(list=ls(pattern="indiv"))
rm(list=ls(pattern="mlm"))
gc()

age_groups <- c("65+", "18\n-\n24","25\n-\n34","35\n-\n44","45\n-\n54","55\n-\n64")



#SDR

indiv_sdr <- lm_robust(voted ~ sdr*age_group18.24 + 
                         sdr*age_group25.34 + 
                         sdr*age_group35.44 + 
                         sdr*age_group45.54 + 
                         sdr*age_group55.64 + 
                         factor(race) + sex + faminc + educ + factor(year) + factor(state),
                       clusters=state, se_type="stata",
                       cps)

indiv_sdr_tidy <- estimatr::tidy(indiv_sdr)
indiv_sdr_tidy$Model <- "Individual Level\n(controls)"
indiv_sdr_tidy <- indiv_sdr_tidy[grepl("sdr", indiv_sdr_tidy$term),]
indiv_sdr_tidy$age_group <- age_groups
indiv_sdr_tidy$treat <- "SDR"

indiv_sdr_bv <- lm_robust(voted ~ sdr*age_group18.24 + 
                            sdr*age_group25.34 + 
                            sdr*age_group35.44 + 
                            sdr*age_group45.54 + 
                            sdr*age_group55.64 + 
                            factor(year) + factor(state), 
                          clusters=state, se_type="stata",
                          cps)

indiv_sdr_bv_tidy <- estimatr::tidy(indiv_sdr_bv)
indiv_sdr_bv_tidy$Model <- "Individual Level\n(bivariate)"
indiv_sdr_bv_tidy <- indiv_sdr_bv_tidy[grepl("sdr", indiv_sdr_bv_tidy$term),]
indiv_sdr_bv_tidy$age_group <- age_groups
indiv_sdr_bv_tidy$treat <- "SDR"

did_table <- rbind(estimatr::tidy(indiv_sdr_bv), estimatr::tidy(indiv_sdr))
did_table[,c("estimate", "std.error", "statistic", "p.value")] <- round(did_table[,c("estimate", "std.error", "statistic", "p.value")], digits=2)

write.csv(did_table, "did_tables_indivlevel_full.csv")



#SDR with three state-year FEs

cps$decade[cps$year<=1992] <- "1992"
cps$decade[cps$year>=1994 & cps$year<=2004] <- "2004"
cps$decade[cps$year>=2006] <- "2018"
cps$state_decade <- paste(cps$st, cps$decade, sep="_")

indiv_sdr_sdFE <- lm_robust(voted ~ sdr*age_group18.24 + 
                              sdr*age_group25.34 + 
                              sdr*age_group35.44 + 
                              sdr*age_group45.54 + 
                              sdr*age_group55.64 + 
                              factor(race) + sex + faminc + educ + 
                              factor(state_decade),
                            clusters=state, se_type="stata",
                            cps)

indiv_sdr_sdFE_tidy <- estimatr::tidy(indiv_sdr_sdFE)
indiv_sdr_sdFE_tidy$Model <- "Individual Level\n(controls, state-decade FEs)"
indiv_sdr_sdFE_tidy <- indiv_sdr_sdFE_tidy[grepl("sdr", indiv_sdr_sdFE_tidy$term),]
indiv_sdr_sdFE_tidy$age_group <- age_groups
indiv_sdr_sdFE_tidy$treat <- "SDR"

indiv_sdr_sdFE_bv <- lm_robust(voted ~ sdr*age_group18.24 + 
                                 sdr*age_group25.34 + 
                                 sdr*age_group35.44 + 
                                 sdr*age_group45.54 + 
                                 sdr*age_group55.64 + 
                                 factor(state_decade), 
                               clusters=state, se_type="stata",
                               cps)

indiv_sdr_sdFE_bv_tidy <- estimatr::tidy(indiv_sdr_sdFE_bv)
indiv_sdr_sdFE_bv_tidy$Model <- "Individual Level\n(bivariate, state-decade FEs)"
indiv_sdr_sdFE_bv_tidy <- indiv_sdr_sdFE_bv_tidy[grepl("sdr", indiv_sdr_sdFE_bv_tidy$term),]
indiv_sdr_sdFE_bv_tidy$age_group <- age_groups
indiv_sdr_sdFE_bv_tidy$treat <- "SDR"

did_table <- rbind(estimatr::tidy(indiv_sdr_sdFE_bv), estimatr::tidy(indiv_sdr_sdFE))
did_table[,c("estimate", "std.error", "statistic", "p.value")] <- round(did_table[,c("estimate", "std.error", "statistic", "p.value")], digits=2)

write.csv(did_table, "did_tables_indivlevel_statedecadeFEs_full.csv")





#SDR with state-time trends

indiv_sdr_timetrends <- lm_robust(voted ~ sdr*age_group18.24 + 
                                    sdr*age_group25.34 + 
                                    sdr*age_group35.44 + 
                                    sdr*age_group45.54 + 
                                    sdr*age_group55.64 + 
                                    factor(race) + sex + faminc + educ + 
                                    factor(year) + 
                                    factor(state)*year,
                                  clusters=state, se_type="stata",
                                  cps)

indiv_sdr_timetrends_tidy <- estimatr::tidy(indiv_sdr_timetrends)
indiv_sdr_timetrends_tidy$Model <- "Individual Level\n(controls, state-time trends)"
indiv_sdr_timetrends_tidy <- indiv_sdr_timetrends_tidy[grepl("sdr", indiv_sdr_timetrends_tidy$term),]
indiv_sdr_timetrends_tidy$age_group <- age_groups
indiv_sdr_timetrends_tidy$treat <- "SDR"

indiv_sdr_timetrends_bv <- lm_robust(voted ~ sdr*age_group18.24 + 
                                       sdr*age_group25.34 + 
                                       sdr*age_group35.44 + 
                                       sdr*age_group45.54 + 
                                       sdr*age_group55.64 + 
                                       factor(year) + 
                                       factor(state)*year, 
                                     clusters=state, se_type="stata",
                                     cps)

indiv_sdr_timetrends_bv_tidy <- estimatr::tidy(indiv_sdr_timetrends_bv)
indiv_sdr_timetrends_bv_tidy$Model <- "Individual Level\n(bivariate, state-time trends)"
indiv_sdr_timetrends_bv_tidy <- indiv_sdr_timetrends_bv_tidy[grepl("sdr", indiv_sdr_timetrends_bv_tidy$term),]
indiv_sdr_timetrends_bv_tidy$age_group <- age_groups
indiv_sdr_timetrends_bv_tidy$treat <- "SDR"

did_table <- rbind(estimatr::tidy(indiv_sdr_timetrends_bv), estimatr::tidy(indiv_sdr_timetrends))
did_table[,c("estimate", "std.error", "statistic", "p.value")] <- round(did_table[,c("estimate", "std.error", "statistic", "p.value")], digits=2)

write.csv(did_table, "did_tables_indivlevel_statetimetrends_full.csv")



#Early Voting (Narrow)
indiv_ev <- lm_robust(voted ~ early_voting_narrow*age_group18.24 + 
                        early_voting_narrow*age_group25.34 + 
                        early_voting_narrow*age_group35.44 + 
                        early_voting_narrow*age_group45.54 + 
                        early_voting_narrow*age_group55.64 + 
                        factor(race) + sex + faminc + educ + factor(year) + factor(state), 
                      clusters=state, se_type="stata",
                      cps)

indiv_ev_tidy <- estimatr::tidy(indiv_ev)
indiv_ev_tidy$Model <- "Individual Level\n(controls)"
indiv_ev_tidy <- indiv_ev_tidy[grepl("early_voting_narrow", indiv_ev_tidy$term),]
indiv_ev_tidy$age_group <- age_groups
indiv_ev_tidy$treat <- "Early Voting"


indiv_ev_bv <- lm_robust(voted ~ early_voting_narrow*age_group18.24 + 
                           early_voting_narrow*age_group25.34 + 
                           early_voting_narrow*age_group35.44 + 
                           early_voting_narrow*age_group45.54 + 
                           early_voting_narrow*age_group55.64 + 
                           factor(year) + factor(state), 
                         clusters=state, se_type="stata",
                         cps)

indiv_ev_bv_tidy <- estimatr::tidy(indiv_ev_bv)
indiv_ev_bv_tidy$Model <- "Individual Level\n(bivariate)"
indiv_ev_bv_tidy <- indiv_ev_bv_tidy[grepl("early_voting_narrow", indiv_ev_bv_tidy$term),]
indiv_ev_bv_tidy$age_group <- age_groups
indiv_ev_bv_tidy$treat <- "Early Voting"

gc()

plotdata <- rbind.fill(indiv_sdr_tidy, indiv_sdr_bv_tidy, indiv_ev_tidy, indiv_ev_bv_tidy, 
                       indiv_sdr_sdFE_tidy, indiv_sdr_sdFE_bv_tidy, indiv_sdr_timetrends_tidy, indiv_sdr_timetrends_bv_tidy)

plotdata$aggregate <- "Individual\nLevel"

#summing for marginal effects
for(i in levels(factor(plotdata$Model))){
  
  for(j in levels(factor(plotdata$treat))){
    
    plotdata$estimate[plotdata$Model==i & plotdata$treat==j &
                        grepl(":", plotdata$term)] <-
      plotdata$estimate[plotdata$Model==i & plotdata$treat==j &
                          grepl(":", plotdata$term)] +
      plotdata$estimate[plotdata$Model==i & plotdata$treat==j &
                          (plotdata$term=="sdr"|plotdata$term=="early_voting_narrow")]
    
    plotdata$std.error[plotdata$Model==i & plotdata$treat==j & 
                         (plotdata$term=="sdr"|plotdata$term=="early_voting_narrow")] <-
      plotdata$std.error[plotdata$Model==i & plotdata$treat==j & 
                           grepl(":", plotdata$term)][2]
    
  }
  
}


plotdata[,c("estimate","std.error")] <- 100*plotdata[,c("estimate","std.error")] 
plotdata$cilow <- plotdata$estimate-(1.96*plotdata$std.error)
plotdata$cihigh <- plotdata$estimate+(1.96*plotdata$std.error)


plotdata <- rbind.fill(plotdata, read.csv("diffindiff_estimates_cps5.csv", stringsAsFactors = F))

plotdata$treat[grepl("Same", plotdata$term)] <- "SDR"
plotdata$treat[grepl("Early", plotdata$term)] <- "Early Voting"
plotdata$age_group[!is.na(plotdata$group) & grepl("fixed", plotdata$group)==F] <- 
  plotdata$group[!is.na(plotdata$group) & grepl("fixed", plotdata$group)==F]

plotdata$age_group[grepl("65", plotdata$age_group)] <- "65+"
plotdata$age_group[grepl("18", plotdata$age_group)] <- "18\n-\n24"
plotdata$age_group[grepl("25", plotdata$age_group)] <- "25\n-\n34"
plotdata$age_group[grepl("35", plotdata$age_group)] <- "35\n-\n44"
plotdata$age_group[grepl("45", plotdata$age_group)] <- "45\n-\n54"
plotdata$age_group[grepl("55", plotdata$age_group)] <- "55\n-\n64"

plotdata$aggregate[is.na(plotdata$aggregate)] <- "Aggregated\nby State"

plotdata$Model[plotdata$Model=="OLS" & plotdata$aggregate=="Aggregated\nby State"] <- "State Level\n(bivariate)"
plotdata$Model[plotdata$Model=="OLS (controls)" & plotdata$aggregate=="Aggregated\nby State"] <- "State Level\n(controls)"

plotdata$Model[plotdata$Model=="WFE"] <- "State Level WFE"

write.csv(plotdata, "diffindiff_estimate_all_20.6.10.csv", na="")

pd <- position_dodge(.5)

est_ranges <- plotdata[plotdata$treat=="SDR" &
                         grepl("MLM", plotdata$Model)==F & plotdata$age_group!="18\n-\n24",]

pdf("diffindiff_cps_RR4.pdf", h=7, w=9)
ggplot(plotdata[plotdata$treat=="SDR" &
                  grepl("MLM", plotdata$Model)==F,], aes(x=age_group, y=estimate)) +
  #geom_point(data=data[data$year>=1980 & !is.na(data$sdr),], aes(x=factor(sdr), y=vep.highest.office), alpha=.03) +
  geom_point(size=2.5, position=pd) +
  geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0, size=1, position=pd) +
  facet_wrap(~Model) +
  #scale_y_continuous(breaks=c(0,5,10,15), limits=c(0,15)) +
  #scale_x_discrete(breaks=c(0,1)) +
  #scale_color_manual(values=c("black", "black", "grey"), guide=F) +
  geom_hline(yintercept=0, linetype=2) +
  #scale_x_discrete(name="", limits = rev(levels(factor(plotdata$age_group)))) +
  #coord_flip() +
  ylab("Effect on Turnout") +
  xlab("Age Group") +
  theme_bw()
dev.off()


####Replicating in Stata
setwd("..")
write.dta(cps[!is.na(cps$voted),], "cps_clean_stata.dta", convert.factors = "string")
setwd("./Output")


#Alternative covariate coding (Appendix Tables A15 and A16)

alt_cov <- lm_robust(voted ~ sdr*age_group18.24 + 
                       sdr*age_group25.34 + 
                       sdr*age_group35.44 + 
                       sdr*age_group45.54 + 
                       sdr*age_group55.64 + 
                       factor(race) + sex + factor(faminc) + factor(educ) + factor(year) + factor(state),
                     clusters=state, se_type="stata",
                     cps)

alt_cov_tidy <- estimatr::tidy(alt_cov)
alt_cov_tidy$Model <- "Individual Level\n(controls)"
alt_cov_tidy$treat <- "SDR"


alt_cov2 <- lm_robust(voted ~ sdr*age_group18.24 + 
                        sdr*age_group25.34 + 
                        sdr*age_group35.44 + 
                        sdr*age_group45.54 + 
                        sdr*age_group55.64 + 
                        factor(race) + sex + factor(faminc) + factor(educ) + factor(year) + factor(state)*year,
                      clusters=state, se_type="stata",
                      cps)

alt_cov2_tidy <- estimatr::tidy(alt_cov2)
alt_cov2_tidy$Model <- "Individual Level\n(controls, state-time trends)"
alt_cov2_tidy$treat <- "SDR"

plot_alt <- rbind(alt_cov_tidy, alt_cov2_tidy)
plot_alt <- plot_alt[grepl("state", plot_alt$term)==F & grepl("year", plot_alt$term)==F,]

write.csv(plot_alt, "DiD_alt_covs.csv", row.names = F)




##########Individual Level DiD POTUS nonPOTUS (Figure 4)###############



ols_4 <- lm(voted ~ sdr*age_group18.24*presidential_year + 
              sdr*age_group25.34*presidential_year + 
              sdr*age_group35.44*presidential_year + 
              sdr*age_group45.54*presidential_year + 
              sdr*age_group55.64*presidential_year + 
              factor(race) + sex + faminc + educ + factor(year) + factor(state), 
            cps)
clusters_4 <- as.character(expand.model.frame(ols_4, ~st, na.expand = T)$st)
fit <- commarobust(ols_4, clusters=clusters_4, se_type = "stata")

write.csv(estimatr::tidy(fit), "potus_nonpotus_full_int.csv", row.names = F)




#OLS
ols_1 <- lm(voted ~ sdr*age_group18.24 + 
                sdr*age_group25.34 + 
                sdr*age_group35.44 + 
                sdr*age_group45.54 + 
                sdr*age_group55.64 + 
                presidential_year + factor(year) + factor(state), 
              cps[cps$presidential_year==1,])


ols_2 <- lm(voted ~ sdr*age_group18.24 + 
                sdr*age_group25.34 + 
                sdr*age_group35.44 + 
                sdr*age_group45.54 + 
                sdr*age_group55.64 + 
                factor(race) + sex + faminc + educ + presidential_year + factor(year) + factor(state), 
              cps[cps$presidential_year==1,])

ols_3 <- lm(voted ~ sdr*age_group18.24 + 
                sdr*age_group25.34 + 
                sdr*age_group35.44 + 
                sdr*age_group45.54 + 
                sdr*age_group55.64 + 
                presidential_year + factor(year) + factor(state), 
              cps[cps$presidential_year==0,])


ols_4 <- lm(voted ~ sdr*age_group18.24 + 
                sdr*age_group25.34 + 
                sdr*age_group35.44 + 
                sdr*age_group45.54 + 
                sdr*age_group55.64 + 
                factor(race) + sex + faminc + educ + presidential_year + factor(year) + factor(state), 
              cps[cps$presidential_year==0,])

clusters_1 <- as.character(expand.model.frame(ols_1, ~st, na.expand = T)$st)
clusters_2 <- as.character(expand.model.frame(ols_2, ~st, na.expand = T)$st)
clusters_3 <- as.character(expand.model.frame(ols_3, ~st, na.expand = T)$st)
clusters_4 <- as.character(expand.model.frame(ols_4, ~st, na.expand = T)$st)

fit_1_r <- commarobust(ols_1, clusters=clusters_1, se_type = "stata")
fit_2_r <- commarobust(ols_2, clusters=clusters_2, se_type = "stata")
fit_3_r <- commarobust(ols_3, clusters=clusters_3, se_type = "stata")
fit_4_r <- commarobust(ols_4, clusters=clusters_4, se_type = "stata")


stargazer(ols_1, ols_2, ols_3, ols_4, se = starprep(fit_1_r, fit_2_r, fit_4_r, fit_4_r, clusters = cps$st), 
          out="POTUS_nonPOTUS_DiD_individual_cps_ols_age.html", type="html", 
          omit = c("year","race", "state"),
          omit.labels = c("Race Dummies", "Year FEs", "State FEs"),
          star.cutoffs = c(0.05,0.01,0.001))

rm(list=ls(pattern="logit_"))
rm(list=ls(pattern="ols_")); gc()


###Plot

model_1 <- lm(voted ~ sdr*age_group18.24 + 
                sdr*age_group25.34 + 
                sdr*age_group35.44 + 
                sdr*age_group45.54 + 
                sdr*age_group55.64 + 
                factor(race) + sex + faminc + educ + factor(year) + factor(state),
              cps[cps$presidential_year==1,])

ols_1 <- tidy(model_1)

clusters_1 <- as.character(expand.model.frame(model_1, ~st, na.expand = T)$st)
ols_1$std.error <- commarobust(model_1, clusters = clusters_1, se_type="stata")$std.error

rm(model_1, clusters_1); gc()


model_2 <- lm(voted ~ sdr*age_group18.24 + 
                sdr*age_group25.34 + 
                sdr*age_group35.44 + 
                sdr*age_group45.54 + 
                sdr*age_group55.64 + 
                factor(race) + sex + faminc + educ + factor(year) + factor(state),
              cps[cps$presidential_year==0,])
ols_2 <- tidy(model_2)

clusters_2 <- as.character(expand.model.frame(model_2, ~st, na.expand = T)$st)
ols_2$std.error <- commarobust(model_2, clusters = clusters_2, se_type="stata")$std.error

rm(model_2, clusters_2); gc()

ols_1$presidential_year <- 1
ols_2$presidential_year <- 0

ols_1$estimate[grepl("sdr:", ols_1$term)] <- ols_1$estimate[grepl("sdr:", ols_1$term)] + ols_1$estimate[ols_1$term=="sdr"]
ols_2$estimate[grepl("sdr:", ols_2$term)] <- ols_2$estimate[grepl("sdr:", ols_2$term)] + ols_2$estimate[ols_2$term=="sdr"]

plotdata <- rbind(ols_1[grepl("sdr", ols_1$term),], ols_2[grepl("sdr", ols_2$term),])
plotdata$age_group <- rep(c("65+", "18-24","25-34","35-44","45-54","55-64"), 2)

plotdata[,c("estimate","std.error")] <- 100*plotdata[,c("estimate","std.error")]

plotdata$cihigh <- plotdata$estimate + 1.96*plotdata$std.error
plotdata$cilow <- plotdata$estimate - 1.96*plotdata$std.error

write.csv(plotdata, "POTUS_nonPOTUS_individual_DiD_results.csv", na="")

pd <- position_dodge(.5)

pdf("mfx_presidential_elections_DiD_controls.pdf", h=4, w=6)
ggplot(plotdata, aes(x=age_group, y=estimate, color=factor(presidential_year))) +
  #geom_point(data=data[data$year>=1980 & !is.na(data$sdr),], aes(x=factor(sdr), y=vep.highest.office), alpha=.03) +
  geom_point(size=2.5, position=pd) +
  geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0, size=1, position=pd) +
  #facet_wrap(~term) +
  #scale_y_continuous(breaks=c(0,5,10,15), limits=c(0,15)) +
  #scale_x_discrete(breaks=c(0,1)) +
  scale_color_manual(values=c("grey", "black"), name="Presidential\nElection") +
  geom_hline(yintercept=0, linetype=2) +
  coord_flip() +
  ylab("Effect on Turnout") +
  xlab("Age Group") +
  theme_bw()
dev.off()




#Bivariate DiD POTUS non-POTUS

model_1 <- lm(voted ~ sdr*age_group18.24 + 
                sdr*age_group25.34 + 
                sdr*age_group35.44 + 
                sdr*age_group45.54 + 
                sdr*age_group55.64 + 
                factor(year) + factor(state),
              cps[cps$presidential_year==1,])

ols_1 <- tidy(model_1)

clusters_1 <- as.character(expand.model.frame(model_1, ~st, na.expand = T)$st)
ols_1$std.error <- commarobust(model_1, clusters = clusters_1, se_type="stata")$std.error

rm(model_1, clusters_1); gc()


model_2 <- lm(voted ~ sdr*age_group18.24 + 
                sdr*age_group25.34 + 
                sdr*age_group35.44 + 
                sdr*age_group45.54 + 
                sdr*age_group55.64 + 
                factor(year) + factor(state),
              cps[cps$presidential_year==0,])
ols_2 <- tidy(model_2)

clusters_2 <- as.character(expand.model.frame(model_2, ~st, na.expand = T)$st)
ols_2$std.error <- commarobust(model_2, clusters = clusters_2, se_type="stata")$std.error

rm(model_2, clusters_2); gc()

ols_1$presidential_year <- 1
ols_2$presidential_year <- 0

ols_1$estimate[grepl("sdr:", ols_1$term)] <- ols_1$estimate[grepl("sdr:", ols_1$term)] + ols_1$estimate[ols_1$term=="sdr"]
ols_2$estimate[grepl("sdr:", ols_2$term)] <- ols_2$estimate[grepl("sdr:", ols_2$term)] + ols_2$estimate[ols_2$term=="sdr"]

plotdata <- rbind(ols_1[grepl("sdr", ols_1$term),], ols_2[grepl("sdr", ols_2$term),])
plotdata$age_group <- rep(c("65+", "18-24","25-34","35-44","45-54","55-64"), 2)

plotdata[,c("estimate","std.error")] <- 100*plotdata[,c("estimate","std.error")]

plotdata$cihigh <- plotdata$estimate + 1.96*plotdata$std.error
plotdata$cilow <- plotdata$estimate - 1.96*plotdata$std.error

write.csv(plotdata, "POTUS_nonPOTUS_individual_DiD_results_bv.csv", na="")

pd <- position_dodge(.5)


pdf("mfx_presidential_elections_DiD_bv.pdf", h=4, w=6)
ggplot(plotdata, aes(x=age_group, y=estimate, color=factor(presidential_year))) +
  #geom_point(data=data[data$year>=1980 & !is.na(data$sdr),], aes(x=factor(sdr), y=vep.highest.office), alpha=.03) +
  geom_point(size=2.5, position=pd) +
  geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0, size=1, position=pd) +
  #facet_wrap(~model) +
  scale_y_continuous(breaks=c(0,5,10), limits=c(min(plotdata$cilow), 10)) +
  #scale_x_discrete(breaks=c(0,1)) +
  scale_color_manual(values=c("grey", "black"), name="Presidential\nElection") +
  geom_hline(yintercept=0, linetype=2) +
  coord_flip() +
  ylab("Effect on Turnout") +
  xlab("Age Group") +
  theme_bw()
dev.off()




##########################Mechanism: Registration (Figure 5)################################

###Plot
gc()

model_1 <- lm_robust(reg_pollingplace ~ sdr*age_group18.24 + 
                       sdr*age_group25.34 + 
                       sdr*age_group35.44 + 
                       sdr*age_group45.54 + 
                       sdr*age_group55.64 + 
                       factor(state) + factor(year), 
                     clusters = st, se_type="stata",
                     cps)

model_2 <- lm_robust(reg_pollingplace ~ sdr*age_group18.24 + 
                       sdr*age_group25.34 + 
                       sdr*age_group35.44 + 
                       sdr*age_group45.54 + 
                       sdr*age_group55.64 + 
                       factor(race) + sex + faminc + educ + 
                       factor(state) + factor(year), 
                     clusters = st, se_type="stata",
                     cps)

ols_1 <- estimatr::tidy(model_1)
ols_2 <- estimatr::tidy(model_2)

ols_1$Model <- "Bivariate"
ols_2$Model <- "Controls"

plotdata <- rbind(ols_1, ols_2)


plotdata <- plotdata[grepl("sdr", plotdata$term),]

plotdata$age_group[grepl("*:", plotdata$term)==F] <- "65+"
plotdata$age_group[grepl("18", plotdata$term)] <- "18\n-\n24"
plotdata$age_group[grepl("25", plotdata$term)] <- "25\n-\n34"
plotdata$age_group[grepl("35", plotdata$term)] <- "35\n-\n44"
plotdata$age_group[grepl("45", plotdata$term)] <- "45\n-\n54"
plotdata$age_group[grepl("55", plotdata$term)] <- "55\n-\n64"

plotdata[,c("estimate","std.error")] <- sapply(plotdata[,c("estimate","std.error")], function(x) as.numeric(as.character(x)))
plotdata[,c("term","age_group")] <- sapply(plotdata[,c("term","age_group")], as.character)

#summing interaction and constituent terms
for(i in levels(factor(plotdata$Model))){
  
  plotdata$estimate[grepl(i, plotdata$Model) & plotdata$age_group!="65+"] <-
    plotdata$estimate[grepl(i, plotdata$Model) & plotdata$age_group!="65+"] +
    plotdata$estimate[grepl(i, plotdata$Model) & plotdata$age_group=="65+"]
  
}

plotdata$cilow <- plotdata$estimate - (1.96*plotdata$std.error)
plotdata$cihigh <- plotdata$estimate + (1.96*plotdata$std.error)

pdf("did_regpollingplace_plot.pdf", h=3, w=4)
ggplot(plotdata, aes(x=age_group, y=estimate)) +
  geom_point(size=1) +
  geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0) +
  facet_wrap(~Model) +
  geom_hline(yintercept=0, linetype=2) +
  #scale_y_continuous(breaks=c(-.05, 0, .05, .1), limits=c(-.05, .1)) +
  xlab("Age") +
  ylab("Effect on Pr(Reg. at Polling Place)") +
  theme_bw()
dev.off()




#################Residential Moving Mechanism####################

pdf("moving_by_age.pdf", h=3, w=3)
ggplot(cps, aes(x=age, y=movedless1yr)) +
  geom_smooth(color="black") +
  #geom_point(alpha=.05) +
  xlab("Age") +
  ylab("Pr(Moved in Last Year)") +
  scale_y_continuous(breaks=c(0,.25,.5)) +
  coord_cartesian(ylim=c(0,.5)) +
  theme_classic()
dev.off()

plotdata <- rbind(tidy(lm(voted ~ movedless1yr + factor(state) +
                                             factor(year), 
                                           cps))[2,],
                  tidy(lm(voted ~ movedless1yr +
                                             age_group18.24 + 
                                             age_group25.34 + 
                                             age_group35.44 + 
                                             age_group45.54 + 
                                             age_group55.64 + 
                                             factor(race) + factor(sex) + faminc + educ + 
                            presidential_year + factor(state) + factor(year), cps))[2,])


plotdata[,c("estimate","std.error")] <- 100*plotdata[,c("estimate","std.error")]

plotdata$cilow <- plotdata$estimate-1.96*plotdata$std.error
plotdata$cihigh <- plotdata$estimate+1.96*plotdata$std.error

plotdata$Model[1] <- "Bivariate"
plotdata$Model[2] <- "Controls"

pdf("moving_turnout.pdf", h=3, w=3)
ggplot(plotdata, aes(x=Model, y=estimate)) +
  geom_point() +
  geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0) +
  geom_hline(yintercept = 0, linetype=2) +
  ylab("Estimate") +
  theme_classic()
dev.off()





################Population Back-of-the-Envelope Estimates#####################


did_results <- read.csv("diffindiff_estimate_all_20.6.10.csv", na.strings="")
did_results <- did_results[(did_results$Model=="Individual Level\n(controls)"|
                              did_results$Model=="Individual Level\n(bivariate)"|
                              did_results$Model=="State Level\n(bivariate)"|
                              did_results$Model=="State Level\n(controls)") &
                             did_results$treat=="SDR",]
did_results <- did_results[!is.na(did_results$estimate),]


logit_2 <- glm(voted ~ sdr*age_group18.24 + 
                 sdr*age_group25.34 + 
                 sdr*age_group35.44 + 
                 sdr*age_group45.54 + 
                 sdr*age_group55.64 + 
                 factor(race) + sex + faminc + educ + presidential_year + factor(year), 
               cps, family=binomial(link="logit"))

setwd("..")
pop_proportions <- read.csv("age_pop_proportions_census2010.csv")
setwd("./Output")

gc()

sim_data <- cps[1:6,]

sim_data[,grepl("age_group", names(sim_data))] <- 0
sim_data$faminc <- mean(cps$faminc, na.rm=T)
sim_data$sex <- 1
sim_data$sdr <- 0
sim_data$year <- 2014
sim_data$educ <- mean(cps$educ, na.rm=T)

sim_data$age_group <- pop_proportions$age_group

for(i in names(sim_data)[grepl("age_group", names(sim_data))][2:7]){
  
  sim_data[which(names(sim_data)[grepl("age_group", names(sim_data))][2:7]==i),i] <- 1
  
}

sim_data <- rbind(sim_data, sim_data)
sim_data$sdr[7:12] <- 1

preds <- data.frame(predict(logit_2, newdata=sim_data, se.fit=T, type="response"))
preds$age_group <- rep(pop_proportions$age_group, 2)


preds$fit_wtd <- preds$fit * rep(pop_proportions$proportion_100,2)

preds$fit_prop_electorate[1:6] <- preds$fit_wtd[1:6]/sum(preds$fit_wtd[1:6])
preds$fit_prop_electorate[7:12] <- preds$fit_wtd[7:12]/sum(preds$fit_wtd[7:12])
preds$sdr[1:6] <- "No SDR"
preds$sdr[7:12] <- "SDR"

plotdata <- preds[1:6,]
plotdata$fit_prop_electorate <- (preds$fit_prop_electorate[7:12] - plotdata$fit_prop_electorate)*100
plotdata$se.fit <- plotdata$se.fit*100

plotdata$cilow <- plotdata$fit_prop_electorate - 1.96*plotdata$se.fit
plotdata$cihigh <- plotdata$fit_prop_electorate + 1.96*plotdata$se.fit


newdata <- model.frame(voted ~ sdr*age_group18.24 + 
                         sdr*age_group25.34 + 
                         sdr*age_group35.44 + 
                         sdr*age_group45.54 + 
                         sdr*age_group55.64 + 
                         factor(race) + sex + faminc + educ + presidential_year + factor(year), data=cps)

newdata <- join(newdata, cps[1:100,grepl("age_group", names(cps))], match="first")

newdata$sdr <- 0
names(newdata)[c(8,13)] <- c("race","year")

newdata$preds_nosdr <- predict(logit_2, newdata=newdata, se.fit=F, type="response")

newdata$sdr <- 1
newdata$preds_sdr <- predict(logit_2, newdata=newdata, se.fit=F, type="response")

agg <- aggregate(cbind(preds_nosdr, preds_sdr)~age_group, newdata, mean, na.rm=T)
agg$model <- "Logit"

agg$std.error <- 100*preds$se.fit[!is.na(preds$se.fit)][7:12]

write.csv(agg, "mfx_predicted_turnout_cps_logit.csv")



did_results <- did_results[order(did_results$Model, did_results$age_group),]

plotdata <- agg
plotdata <- rbind(plotdata, plotdata, plotdata)
plotdata$model[7:12] <- "Diff-in-Diff"
plotdata$model[13:18] <- "Diff-in-Diff w/ controls"

plotdata$preds_sdr[7:12] <- plotdata$preds_nosdr[7:12] + (did_results$estimate[did_results$Model=="State Level\n(controls)"]/100)
plotdata$preds_sdr[13:18] <- plotdata$preds_nosdr[13:18] + (did_results$estimate[did_results$Model=="Individual Level\n(controls)"]/100)

plotdata$std.error[7:12] <- did_results$std.error[did_results$Model=="Individual Level\n(bivariate)"]
plotdata$std.error[13:18] <- did_results$std.error[did_results$Model=="Individual Level\n(controls)"]

for(i in levels(factor(plotdata$model))){
  
  plotdata[plotdata$model==i,c("preds_nosdr","preds_sdr")] <- 
    (plotdata[plotdata$model==i,c("preds_nosdr","preds_sdr")]*pop_proportions$proportion_100)
  
  plotdata$preds_nosdr[plotdata$model==i] <- 100*plotdata$preds_nosdr[plotdata$model==i]/sum(plotdata$preds_nosdr[plotdata$model==i])
  plotdata$preds_sdr[plotdata$model==i] <- 100*plotdata$preds_sdr[plotdata$model==i]/sum(plotdata$preds_sdr[plotdata$model==i])
  
  plotdata$diff[plotdata$model==i] <- plotdata$preds_sdr[plotdata$model==i]-plotdata$preds_nosdr[plotdata$model==i]
  plotdata$cilow[plotdata$model==i] <- plotdata$diff[plotdata$model==i] - (1.96*plotdata$std.error[plotdata$model==i]/4)
  plotdata$cihigh[plotdata$model==i] <- plotdata$diff[plotdata$model==i] + (1.96*plotdata$std.error[plotdata$model==i]/4)
  
}

plotdata$Model <- plotdata$model

pd <- position_dodge(.5)

pdf("mfx_pop_proportions.pdf", h=4, w=6)
ggplot(plotdata[plotdata$Model!="Logit",], aes(x=age_group, y=diff, shape=Model)) +
  #geom_point(data=data[data$year>=1980 & !is.na(data$sdr),], aes(x=factor(sdr), y=vep.highest.office), alpha=.03) +
  geom_point(size=2.5, position=pd) +
  geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0, position=pd) +
  #scale_color_manual(values=c("grey", "grey", "black")) +
  #facet_wrap(~term) +
  #scale_y_continuous(breaks=c(0,5,10,15), limits=c(0,15)) +
  #scale_y_continuous(breaks=c(-1, -.5, 0, .5, 1)) +
  #scale_shape_manual(values=c(16,17,15), name="Model") +
  geom_hline(yintercept=0, linetype=2) +
  coord_flip() +
  ylab("Change in Percentage of Electorate") +
  xlab("Age Group") +
  theme_bw()
dev.off()

write.csv(plotdata, "mfx_pop_proportions.csv")
















